NAVNavbar
Logo
cURL php NodeJS Python cSharp

Content Duplicates

 
This endpoint will provide you with a list of pages that contain duplicate content from the page specified in the POST request. You will obtain the URL of each page, its IP address, size in bytes, meta tag info, server info, and relevant data.

The returned results are specific to the similarity parameter specified in the POST request. You can set this parameter from 0 to 4.

When set to 0, the API will return pages with content not similar (or minimally similar) to the content of the target page. When set to 4, the API will return pages with content highly similar to the content of the target page.

Instead of ‘login’ and ‘password’ use your credentials from https://app.dataforseo.com/api-dashboard

<?php
// You can download this file from here https://cdn.dataforseo.com/v3/examples/php/php_RestClient.zip
require('RestClient.php');
$api_url = 'https://api.dataforseo.com/';
// Instead of 'login' and 'password' use your credentials from https://app.dataforseo.com/api-dashboard
$client = new RestClient($api_url, null, 'login', 'password');
$post_array = array();
// simple way to set a task
$post_array[] = array(
   "target" => "https://dataforseo.com",
   "similarity" => 4,
   "limit" => 10
);
try {
   // POST /v3/backlinks/content_duplicates/live
   $result = $client->post('/v3/backlinks/content_duplicates/live', $post_array);
   print_r($result);
   // do something with post result
} catch (RestClientException $e) {
   echo "n";
   print "HTTP code: {$e-=>getHttpCode()}n";
   print "Error code: {$e-=>getCode()}n";
   print "Message: {$e-=>getMessage()}n";
   print  $e-=>getTraceAsString();
   echo "n";
}
$client = null;
?>

The above command returns JSON structured like this:

{
    "version": "0.1.20220819",
    "status_code": 20000,
    "status_message": "Ok.",
    "time": "1.0552 sec.",
    "cost": 0.02003,
    "tasks_count": 1,
    "tasks_error": 0,
    "tasks": [
        {
            "id": "08301018-1535-0267-0000-a726b4434997",
            "status_code": 20000,
            "status_message": "Ok.",
            "time": "1.0016 sec.",
            "cost": 0.02003,
            "result_count": 1,
            "path": [
                "v3",
                "backlinks",
                "content_duplicates",
                "live"
            ],
            "data": {
                "api": "backlinks",
                "function": "content_duplicates",
                "target": "https://www.marthastewart.com/2226792/how-bathe-your-cat",
                "limit": 5,
                "similarity": 2
            },
            "result": [
                {
                    "target": "https://www.marthastewart.com/2226792/how-bathe-your-cat",
                    "similarity": 2,
                    "total_count": 1,
                    "items_count": 1,
                    "items": [
                        {
                            "type": "backlinks_content_duplicate",
                            "similarity": 2,
                            "main_domain": "assassinartist.co.uk",
                            "domain": "liva.assassinartist.co.uk",
                            "tld": "co.uk",
                            "page": "https://liva.assassinartist.co.uk/www.marthastewart.com/2226792/how-bathe-your-cat",
                            "ip": "104.21.9.4",
                            "first_visited": "2022-04-28 07:38:52 +00:00",
                            "prev_visited": null,
                            "fetch_time": "2022-04-28 07:38:52 +00:00",
                            "status_code": 200,
                            "location": null,
                            "size": 192736,
                            "encoded_size": 0,
                            "content_encoding": "br",
                            "media_type": "text/html",
                            "server": "cloudflare",
                            "meta": {
                                "title": "&##128543;&##128556;&##129297; How to Give a Cat a Bath | Martha Stewart",
                                "canonical": null,
                                "internal_links_count": 126,
                                "external_links_count": 3,
                                "images_count": 0,
                                "words_count": 931,
                                "page_spam_score": 25,
                                "social_media_tags": {
                                    "twitter:image": "https://liva.assassinartist.co.uk-content%2Fuploads%2Fsites%2F34%2F2021%2F04%2F07%2Fmartha-polaroid-cat-76PRrD5RKgO9gpXuR32f-g-0521.jpg",
                                    "og:image": "https://liva.assassinartist.co.uk-content%2Fuploads%2Fsites%2F34%2F2021%2F04%2F07%2Fmartha-polaroid-cat-76PRrD5RKgO9gpXuR32f-g-0521.jpg",
                                    "og:image:width": "996",
                                    "og:image:height": "498",
                                    "fb:app_id": "165082593548215",
                                    "fb:pages": "10053803772"
                                },
                                "h1": [
                                    "How to Bathe Your Cat—Plus, How Often You Actually Need to Do It"
                                ],
                                "h2": [
                                    "Top Navigation",
                                    "Profile Menu",
                                    "Explore Martha Stewart",
                                    "Brush Your Cat Regularly",
                                    "Only Clean as Necessary",
                                    "Use the Right Shampoo",
                                    "Wash Carefully",
                                    "Comments",
                                    "Share & More",
                                    "Comment on this project",
                                    "Learn More",
                                    "Connect",
                                    "Sign in",
                                    "View image"
                                ],
                                "h3": [
                                    "Account",
                                    "More",
                                    "Explore",
                                    "FOOD",
                                    "HOLIDAYS",
                                    "ENTERTAINING",
                                    "HOME",
                                    "GARDENING",
                                    "CLEANING & ORGANIZING",
                                    "WEDDINGS",
                                    "DIY",
                                    "BEAUTY & WELLNESS",
                                    "LIFE",
                                    "Follow Us"
                                ],
                                "images_alt": null,
                                "powered_by": null,
                                "language": "en",
                                "charset": "utf-8",
                                "platform_type": [
                                    "unknown"
                                ],
                                "technologies": {
                                    "cdn": "cloudflare"
                                }
                            }
                        }
                    ]
                }
            ]
        }
    ]
}

All POST data should be sent in the JSON format (UTF-8 encoding). The task setting is done using the POST method. When setting a task, you should send all task parameters in the task array of the generic POST array.

Description of the fields for setting a task:

Field name Type Description
target string page URL
required field
example:
"https://www.marthastewart.com/2226792/how-bathe-your-cat"
Note: you can specify only URLs in this field;
when sending multiple requests simultaneously, the URLs in this field must belong to the same domain to avoid errors
similarity integer content similarity score
you can set this score from 0 to 4;
when set to 0, the API will return pages with content not similar (or minimally similar) to the content of the target page;
when set to 4, the API will return pages with content highly similar to the content of the target page;
default value: 2
limit integer the maximum number of returned pages
optional field
default value: 100;
maximum value: 1000
offset integer offset in the results array of returned pages
optional field
default value: 0;
if you specify the 10 value, the first ten pages in the results array will be omitted and the data will be provided for the successive pages
filters array array of results filtering parameters
optional field
you can add several filters at once (8 filters maximum);
you should set a logical operator and, or between the conditions;
the following operators are supported:
=, <>, in, not_in, like, not_like, ilike, not_ilike
you can use the % operator with like and not_like to match any string of zero or more characters
example:
["meta.internal_links_count",">","1"]

[["meta.external_links_count",">","2"],
"and",
["meta.internal_links_count",">","10"]]

[["first_visited",">","2017-10-23 11:31:45 +00:00"],
"and",
[["title","like","%seo%"],"or",["page_spam_score",">","10"]]]

The full list of possible filters is available by this link.

order_by array results sorting rules
optional field
you can use the same values as in the filters array to sort the results;
possible sorting types:
asc – results will be sorted in ascending order;
desc – results will be sorted in descending order;
you should use a comma to set up a sorting type;
example:
["page_spam_score,desc"]
note that you can set no more than three sorting rules in a single request;
you should use a comma to separate several sorting rules;
example:
["page_spam_score,desc","words_count,asc"]
tag string user-defined task identifier
optional field
the character limit is 255
you can use this parameter to identify the task and match it with the result
you will find the specified tag value in the data array of the response

‌‌‌‌‌‌
As a response of the API server, you will receive JSON-encoded data containing a tasks array with the information specific to the set tasks.

Description of the fields in the results array:

Field name Type Description
version string the current version of the API
status_code integer general status code
you can find the full list of the response codes here
Note: we strongly recommend designing a necessary system for handling related exceptional or error conditions
status_message string general informational message
you can find the full list of general informational messages here
time string execution time, seconds
cost float total tasks cost, USD
tasks_count integer the number of tasks in the tasks array
tasks_error integer the number of tasks in the tasks array returned with an error
tasks array array of tasks
        id string task identifier
unique task identifier in our system in the UUID format
        status_code integer status code of the task
generated by DataForSEO; can be within the following range: 10000-60000
you can find the full list of the response codes here
        status_message string informational message of the task
you can find the full list of general informational messages here
        time string execution time, seconds
        cost float cost of the task, USD
        result_count integer number of elements in the result array
        path array URL path
        data array contains the same parameters that you specified in the POST request
        result array array of results
            target string target in a POST array
            similarity integer content similarity score from the POST array
            total_count integer total number of relevant items in the database
            items_count integer number of items in the items array
            items array items array
                type string type of element = ‘backlinks_content_duplicate’
                similarity integer content similarity score
can take values from 0 to 4
                main_domain string main website domain
main website domain does not include subdomains
                domain string domain
domain where the page was found
                tld string top-level domain
top-level domain in the DNS root zone
                page string page URL
relevant page’s URL
                ip string Internet Protocol
                first_visited string date and time of the first page visit
date and time when our crawler visited this page for the first time;
in the UTC format: “yyyy-mm-dd hh-mm-ss +00:00”
example:
2017-01-24 13:20:59 +00:00
                prev_visited string previous to the most recent date when our crawler visited the page
in the UTC format: “yyyy-mm-dd hh-mm-ss +00:00”
example:
2017-01-24 13:20:59 +00:00
                fetch_time string most recent date and time when our crawler visited the page
in the UTC format: “yyyy-mm-dd hh-mm-ss +00:00”
example:
2017-01-24 13:20:59 +00:00
               status code integer HTTP status code of the page
               location string location header
indicates the URL to redirect a page to if exists
               size integer indicates the page size, in bytes
               encoded_size integer page size after encoding
indicates the size of the encoded page, in bytes
               content_encoding string type of encoding
               media_type string types of media used to display the page
               server string server version
               meta object page meta data
                   title string page title
                   canonical string canonical page
                   internal_links_count integer number of internal links on the page
                   external_links_count integer number of external links on the page
                   images_count integer number of images on the page
                   words_count integer number of words on the page
                   page_spam_score integer spam score of the page
this metric indicates how spammy the page is, considering various signals;
learn more about how the score is calculated on this help center page
                   social_media_tags object social media tags found on the page
contains social media tags and their content
supported tags include but are not limited to Open Graph and Twitter card
                   h1 array h1 tag
content of h1 tags
                   h2 array h2 tag
content of h2 tags
                   h3 array h3 tag
content of h3 tags
                   images_alt array content of alt tags
                   powered_by array CMS details
                   language string page content language
example:
en
                   charset string character encoding
examples:
utf-8
                   platform_type array type of a platform
                   technologies object website technologies
                       cdn string content delivery network

‌‌